package org.sjcx.jsoup;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.HashMap;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import my.common.WriteToFiles;

public class ScoopXZ168 {

	/**
	 * 抓取目标 : http://www.xiaozhu168.com/pro-details-$.html
	 * 
	 */
	private static String type = "UTF-8"; //文本默认编码
	static URL _url = null;
	HttpURLConnection connection = null;
	Document doc = null;
	private static WriteToFiles wtf = new WriteToFiles();
	
	/**
	 * 通用的链接方式
	 * @param hrefUrl
	 * @return
	 */
	private Document urlCommon( String hrefUrl ) {
		
		try {
			URL url = new URL( hrefUrl );
			connection = (HttpURLConnection)url.openConnection();
			//默认就是Get，可以采用post，大小写都行，因为源码里都toUpperCase了。
			connection.setRequestMethod("GET");
			//是否允许缓存，默认true。
			connection.setUseCaches(Boolean.FALSE);
			//是否开启输出输入，如果是post使用true。默认是false
			//connection.setDoOutput(Boolean.TRUE);
			//connection.setDoInput(Boolean.TRUE);
			//设置请求头信息
			connection.addRequestProperty("Connection", "close");
			//设置连接主机超时（单位：毫秒）  
			connection.setConnectTimeout(8000);  
			//设置从主机读取数据超时（单位：毫秒）  
			connection.setReadTimeout(8000);    
			//设置Cookie
//			connection.addRequestProperty("Cookie","你的Cookies" );
			//开始请求
			doc = Jsoup.parse(connection.getInputStream(), type, hrefUrl);

		} catch (IOException e) {
			e.printStackTrace();
		}
		
		return doc;
	}
	
	public static void main(String[] args) {
		new ScoopXZ168().URLData();
	}
	
	HashMap<String,String> map = new HashMap<String,String>();
	static int  i = 1; //下标序号
	static int index = 1; //起始页码
	public boolean URLData(){
		String hrefUrl = null ; 
		while (true) {
			++index;
			hrefUrl= "http://www.xiaozhu168.com/pro-details-"+index+".html";
			int status = 0;
			try {
				status = getHttpResponseCode( hrefUrl );
			} catch (MalformedURLException e1) {
				e1.printStackTrace();
			}
			
			if( status == 200 ){
				Document xz168 = urlCommon( hrefUrl );
				
				map.put("url" , hrefUrl);
				//获取标名称
				String tagName = xz168.select(".tag-type").text();
 
				//如果页面找不到 (class = "tag-type") 就跳过,重新执行
				if("".equals(tagName)){
					return URLData();
				}
				try {
					tagName = tagName.substring(0, tagName.indexOf("可")).replaceAll(" ", "_");
					map.put("tagName", tagName);
				} catch (Exception e) {
					tagName = tagName.substring(0, tagName.indexOf("开")).trim();
					map.put("tagName", tagName);
				}
				
				//预期年化收益
				String profit = xz168.select(".tag-profit strong").text();
				profit = profit.substring(0, profit.indexOf("%")+1);
				map.put("profit", profit);
				
				//期限
				String date = xz168.select(".tag-limit strong").text();
				date = date.replace(" ", "");
				map.put("date", date);
				
				//项目金额
				String account = xz168.select(".tag-count strong").text();
				map.put("account", account);
				
				map.put("index", i+"");
				i++;
				
				System.out.println( map.toString() );
				wtf.writerText("F:/demo/", map.toString()+"\n", "xz168.txt");
			}else{
				try {
					Thread.sleep(5000);
				} catch (InterruptedException e) {
					e.printStackTrace();
				}
			}
		}
		
		
	}
	
	/**
	 * 获取链接状态
	 * @param url
	 * @return
	 * @throws MalformedURLException 
	 */
	public int getHttpResponseCode(String hrefUrl) throws MalformedURLException {
        HttpURLConnection httpurlconnection = null;
        int responsecode = -1;
        
        URL url = new URL(hrefUrl);
        try {
            URLConnection urlconnection = url.openConnection();
            urlconnection.connect();
            if (!(urlconnection instanceof HttpURLConnection)) {
                // urlconnection.disconnect();
                return responsecode;
            }

            httpurlconnection = (HttpURLConnection) urlconnection;
            // httpurlconnection.setFollowRedirects(true);

            // 获取返回码,通过responsecode 就可以知道网页的状态,我们也是通过此字段用于判断请求的资源是否存在
            responsecode= httpurlconnection.getResponseCode();
            switch (responsecode) {
                // here valid codes!
                case HttpURLConnection.HTTP_OK:
                case HttpURLConnection.HTTP_MOVED_PERM:
                case HttpURLConnection.HTTP_MOVED_TEMP:
                    break;
                default:
                    httpurlconnection.disconnect();
            }
        } catch (Exception ioexception) {
            if (httpurlconnection != null) {
                httpurlconnection.disconnect();
            }
            return responsecode;
        }
        return responsecode;
    }
	
	private static void print(String msg, Object... args) {
		System.out.println(String.format(msg, args));
	}

	private static String trim(String s, int width) {
		if (s.length() > width)
			return s.substring(0, width-1) + ".";
		else
			return s;
	}
	
 
	
}
